# @package _group_

batch_size: 16
dev_batch_size: 64
adam_eps: 1e-8
adam_betas: (0.9, 0.999)
max_grad_norm: 2.0
log_batch_step: 10
train_rolling_loss_step: 100
weight_decay: 0.0
learning_rate: 2e-5

# Linear warmup over warmup_steps.
warmup_steps: 1237

# Number of updates steps to accumulate before performing a backward/update pass.
gradient_accumulation_steps: 1

# Total number of training epochs to perform.
num_train_epochs: 40
eval_per_epoch: 1
hard_negatives: 1
other_negatives: 0
val_av_rank_hard_neg: 30
val_av_rank_other_neg: 30
val_av_rank_bsz: 128
val_av_rank_max_qs: 10000